
# =============================================================
# File: JOP_main.R
# Purpose: Produces results from main paper
# Paper: Foreignness as an Asset: European Carbon Regulation and the Relocation Threat among Multinational Firms
# Author: Patrick Bayer, patrick.bayer@strath.ac.uk
# Date: 5 October 2022
#
# Data: ./data.csv
#
# Technical disclaimer:
# All analyses in R version 4.1.2 (2021-11-01)
# RStudio 2022.07.1 Build 554 ("Spotted Wakerobin" Release (7872775e, 2022-07-22) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1270P 2.20 GHz with 32GB RAM
# =============================================================


# Load required libraries
library(lmtest)
library(sandwich)
library(MASS)
library(MatchIt)
library(cem)
library(cobalt)
library(cowplot)
library(ggpubr)
library(reshape2)
library(tidyverse)

# Load data
df <- read_csv(file = "./data.csv")


# =============================================================
# Descriptive information
# =============================================================

# Number of firms with ownership information
dim(df)

# Plants owned by MNCs vs non-MNCs
table(df$mnc)
table(df$mnc)[2]/sum(table(df$mnc))

# Ownership breakdown for MNCs
table(df$foreign[df$mnc==1])
table(df$foreign[df$mnc==1])[2]/sum(table(df$foreign[df$mnc==1]))


# =============================================================
# Table 1
# =============================================================

# Create firm-level data set

# Mode function
mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

firm.level <- df %>%
                group_by(BVD) %>%
                summarise(owner=first(owner),count=n(), foreign.sum=sum(foreign), ctry.count=length(unique(iso2)), sector=mode(NACE)) %>% 
                mutate(foreign.share=foreign.sum/count,mnc=ifelse(foreign.share>0,1,0))

t.test(count~mnc, data=firm.level)

top15 <- head(firm.level[order(-firm.level$count),],n=15)
top15

# Additional information about top15 firms 
sum(top15$count)/sum(table(df$foreign))
sum(top15$foreign.sum)/table(df$foreign)[2]

# Comparison between Wienerberger and BASF
comp.dta <- df[df$BVD=="DE7150000030" | df$BVD=="AT9110026043",]

pct1 <- mean(comp.dta$alloc1[comp.dta$BVD=="DE7150000030" & comp.dta$foreign==1], na.rm=TRUE)/mean(comp.dta$alloc1[comp.dta$BVD=="DE7150000030" & comp.dta$foreign==0], na.rm=TRUE)
pct2 <- mean(comp.dta$alloc1[comp.dta$BVD=="AT9110026043" & comp.dta$foreign==1], na.rm=TRUE)/mean(comp.dta$alloc1[comp.dta$BVD=="AT9110026043" & comp.dta$foreign==0], na.rm=TRUE)

(pct2-1)/(pct1-1)

# Descriptive information on non-EU MNCs
table(df$owner.iso[df$noneu==1]) # Distribution of non-EU MNCs by country
sum(table(df$owner.iso[df$noneu==1])) # Number of plants owned by non-EU MNCs

# Firms from outside the EU
firm.level.noneu <- df[df$noneu==1,] %>%
                group_by(BVD) %>%
                summarise(owner=first(owner),count=n(), foreign.sum=sum(foreign), ctry.count=length(unique(iso2)), sector=mode(NACE)) %>% 
                mutate(foreign.share=foreign.sum/count,mnc=ifelse(foreign.share>0,1,0))

dim(firm.level.noneu) # Number of non-EU MNCs

# Top-5 non-EU MNC owners
top5.noneu <- head(firm.level.noneu[order(-firm.level.noneu$count),],n=5)
top5.noneu


# =============================================================
# Figure 1
# =============================================================

# Color settings: colorblind-friendly palette
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cols <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# =============================================================
# Panel A: By-country information
# =============================================================

# Create by country data set
by.ctry <- df %>%
            group_by(iso2) %>%
            summarise(count=n(), count.mnc=sum(mnc), mnc.foreign=sum(foreign)) %>%
            mutate(count.non.mnc=count-count.mnc, mnc.share=count.mnc/count, 
                   foreign.share=mnc.foreign/count.mnc, mnc.domestic=count.mnc-mnc.foreign) %>%
            print(n=nrow(.))

df.plot <- by.ctry[c("iso2", "count.non.mnc", "mnc.domestic", "mnc.foreign")]
table(by.ctry$count==rowSums(df.plot[,c(2:4)]))

df.plot <- melt(df.plot, id="iso2")
df.plot <- df.plot[order(df.plot$iso2),]

# Reorder for nicer plotting
df.plot$iso2 <- as.factor(df.plot$iso2)
df.plot$iso2 <- factor(df.plot$iso2, levels=rev(levels(df.plot$iso2)))
df.plot$variable <- factor(df.plot$variable, levels=rev(levels(df.plot$variable)))

# Exclude Liechtenstein (only 1 plant)
df.plot <- df.plot[df.plot$iso2!="LI",]
by.ctry <- by.ctry[by.ctry$iso2!="LI",]


# Split data sets in two halves for nicer plotting
df.plot.top <- df.plot[1:39,]
df.plot.bottom <- df.plot[40:78,]

by.ctry.top <- by.ctry[1:13,]
by.ctry.bottom <- by.ctry[14:26,]

                           
# Main plot
ctry.plot1 <- ggplot(data=df.plot.top, aes(x=value, y=iso2, group=variable, fill=variable)) + 
                  geom_bar(stat="identity") +
                  labs(x="Plant count", y="Country", title="Panel A: Plant Count by Country and Ownership Type \n") +
                  xlim(0,1700) +
                  scale_fill_manual(name="", values=c("count.non.mnc"=cols[1], "mnc.domestic"=cols[2], "mnc.foreign"=cols[3]),
                          labels=c("Non-MNCs", "MNC: Domestically-owned", "MNC: Foreign-owned")) +
                  theme_half_open(12) +
                  theme(legend.position="bottom") +
                  annotate("text", x=rep(1500,13), y=seq(1,13,1), 
                  label=paste(round(rev(by.ctry.top$mnc.share),2)*100,"%"), size=3)
ctry.plot1


ctry.plot2 <- ggplot(data=df.plot.bottom, aes(x=value, y=iso2, group=variable, fill=variable)) + 
                  geom_bar(stat="identity") +
                  labs(x="Plant count", y="Country", title="") +
                  xlim(0,1700) +
                  scale_fill_manual(name="", values=c("count.non.mnc"=cols[1], "mnc.domestic"=cols[2], "mnc.foreign"=cols[3]),
                          labels=c("Non-MNCs", "MNC: Domestically-owned", "MNC: Foreign-owned")) +
                  theme_half_open(12) +
                  theme(legend.position="bottom") +
                  annotate("text", x=rep(1500,13), y=seq(1,13,1), 
                  label=paste(round(rev(by.ctry.bottom$mnc.share),2)*100,"%"), size=3) + 
                  theme(legend.position="none", plot.title.position="plot")
ctry.plot2

# =============================================================
# Panel B: Distribution of DV
# =============================================================

# DV plots
df.plot <- df

# Create indicator for three ownership groups
df.plot$value <- ifelse(df.plot$mnc==0,1, ifelse(df.plot$mnc==1 & df.plot$foreign==0,2,3))

# Domestic vs foreign density
dv.plot <- ggplot(df.plot[is.na(df.plot$logDV)==FALSE,],aes(x=logDV, group=as.factor(value), fill=as.factor(value)))+
  geom_density(alpha=0.8)+
  labs(x="Permit allocation (log)", y="Density", title="Panel B: Permit Allocation \nby Ownership Type") +
  scale_fill_manual(values=c(cols[1],cols[2],cols[3]), name="", labels=c("Non-MNCs", "MNC: Domestically-owned", "MNC: Foreign-owned"))+
  coord_cartesian(ylim=c(0, 0.30)) +
  theme_half_open(12) +
  theme(legend.position="none", plot.title.position="plot")
dv.plot

# =============================================================
# Create joint plot
# =============================================================

# Extract legend information from plot
legend <- get_legend(ctry.plot1 + 
                       guides(color = guide_legend(nrow = 1)) +
                       theme(legend.position = "bottom", legend.justification="center"))

# Create plot without legend for joint plotting
ctry.plot_wo <- ctry.plot1 + theme(legend.position = "none", plot.title.position = "plot")

# Align plots
top.row <- align_plots(ctry.plot_wo, ctry.plot2, dv.plot, align='hv', axis='l')

# Align and order plots for final plotting
fig1 <- plot_grid(top.row[[1]], top.row[[2]], top.row[[3]], nrow=1, align='v', axis='l')

# Add common legend
# Note: Make sure to run the line using 'get_legend' above 
fig1_wlegend <- plot_grid(fig1, legend, ncol = 1, rel_heights = c(1, 0.1))
fig1_wlegend

# Export to PDF
ggexport(fig1_wlegend, filename="fig1.pdf", width=10, height=5)



# =============================================================
# Main results (Table 2)
# =============================================================

# Trim sample down to within firm sample
# (1) Limit sample to European MNCs: no allocation data for domestically-owned operations for non-EU MNCs
# (2) Exclude MNCs with only foreign-owned operations as there is no within MNC variation

df$sample <- ifelse(df$mnc.eu==1 & df$foreign.share<1,1,0)
df <- df[df$sample==1,]
df$foreign <- as.factor(df$foreign) # set variable as categorical
table(df$foreign.share)


# =============================================================
# Model (1)
# =============================================================

m1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df)
summary(m1)


# Observations
nobs(m1)
length(m1$xlevels$`as.factor(iso2)`)
length(m1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m1)$coef[["foreign1","Estimate"]])-1)*100
est.m1 <- (exp(summary(m1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
ci.m1 <- (exp(coefci(m1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (2)
# =============================================================

m2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df)
summary(m2)

# Observations
nobs(m2)
length(m2$xlevels$`as.factor(ETSsector)`)
length(m2$xlevels$`as.factor(iso2)`)
length(m2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m2)$coef[["foreign1","Estimate"]])-1)*100
est.m2 <- (exp(summary(m2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs
ci.m2 <- (exp(coefci(m2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (3): NACE-4 matching
# =============================================================

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)


# Re-run main model on matched sample
m3 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, weights=weights)
summary(m3)


# Observations
nobs(m3)
length(m3$xlevels$`as.factor(ETSsector)`)
length(m3$xlevels$`as.factor(iso2)`)
length(m3$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(m3)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(m3, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Figure 2: Matching results
# =============================================================

# Code provided here reproduces Figure 2 in the main paper
# Code for the full matching results is in App_G_matching.R and section G "Matching Analysis" in the appendix

# =============================================================
# Mahalanobis matching on total allocation (log) variable
# =============================================================

# Set color-blind friendly color palette
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cols <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")


# Mahalanobis matching
df.match <- df[,c("logDV","logAlloc0","foreign", "logEmit0","ETSsector","iso2","BVD", "mnc", "sample")]
df.match <- data.frame(na.omit(df.match))

# Check data before matching
m.out0 <- matchit(foreign ~ logAlloc0, data=df.match, method=NULL, distance="glm")

# Matching
m.out1 <- matchit(as.numeric(foreign) ~ logAlloc0, data=df.match, method="nearest", distance="mahalanobis")
df.match.post <- match.data(m.out1)
dim(df.match.post)

# Model (1): matched sample
mmm1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match.post, weights=weights) # main model
est.mmm1 <- (exp(summary(mmm1)$coef[["foreign1","Estimate"]])-1)*100 # marginal effect
ci.mmm1 <- (exp(coefci(mmm1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# Model (2): matched sample
mmm2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match.post, weights=weights) # main model
est.mmm2 <- (exp(summary(mmm2)$coef[["foreign1","Estimate"]])-1)*100 # marginal effect
ci.mmm2 <- (exp(coefci(mmm2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# Create data frame with model coefficients for plotting
# Note: Make sure to run the code under "Main Results (Table 2)" above

coef <- c(est.mmm1, est.mmm2, est.m1, est.m2)
cilo <- c(ci.mmm1[1], ci.mmm2[1], ci.m1[1], ci.m2[1])
ciup <- c(ci.mmm1[2], ci.mmm2[2], ci.m1[2], ci.m2[2])
type <- c(rep("Matched", 2), rep("Unmatched", 2))
model <- c(0.95,1.55,1.05,1.65)

df.plot <- data.frame(model,type,coef,cilo,ciup)

# =============================================================
# Plot 1: Matching statistics
# =============================================================

# Standardized mean differences (smds) are negative, but univariate imbalance measure is positive
# To produce a plot that is on the positive scale, absolute values of smds are used

# Standardized mean difference measures
value <- abs(c(summary(m.out1)$sum.all[,3],summary(m.out1)$sum.matched[,3]))
name <- rep(c("SMD"),2)
sample <- c("unmatched","matched")
df.smd <- data.frame(name, sample, value)

# Univariable imbalance measure
value <- c(imbalance(group=df.match$foreign, data=df.match["logAlloc0"])$tab$L1, imbalance(group=df.match.post$foreign, data=df.match.post["logAlloc0"])$tab$L1)
name <- rep(c("L1"),2)
sample <- c("unmatched","matched")
df.imb <- data.frame(name, sample, value)

# Data that contains all the above information
df.stat <- rbind(df.smd,df.imb)

# Plot of matching statistics
p.stat <- ggplot(data=df.stat, aes(x=name, y=value, color=sample)) + 
  geom_point(shape=19, size=2) +
  coord_flip() +
  scale_colour_manual(values=c(cols[3],cols[1]), name="Sample", labels=c("Unmatched", "Matched")) +
  labs(x="", y="Matching statistic", title="Panel A: Mahalanobis Matching on Total Permit Allocation") +
  scale_x_discrete(limits=rev(levels(df.stat$name))) +
  geom_hline(yintercept=0, lty=2) +
  scale_y_continuous(limits=c(0,0.05), breaks=c(0, 0.01, 0.02, 0.03, 0.04, 0.05)) +
  theme_half_open(12) +
  theme(legend.position="none", plot.title.position="plot")
p.stat

# =============================================================
# Plot 2: Coefficient plot
# =============================================================


p.coef <- ggplot(df.plot,aes(x=model, y=coef, color=factor(type))) +
  scale_x_continuous(limits=c(0.9,1.7), breaks=c(1,1.6), 
                     labels=c("Model (1)", "Model (2)")) +
  geom_hline(yintercept=0, lty=2) +
  scale_y_continuous(limits=c(0,30), breaks=c(0,5,10,15,20,25,30)) +
  geom_errorbar(aes(ymin=cilo, ymax=ciup, color=factor(type)), width=0, lwd=1) +
  geom_point(aes(y=coef, color=factor(type)), shape=19, size=2) +
  scale_colour_manual(values=c(cols[3],cols[1]), name="Sample", labels=c("Matched", "Unmatched")) +
  labs(x="Models", y="Marginal effect", title="") +
  theme(legend.position="bottom") +
  annotate(geom="text", x=1.3 , y=27.5, label="MNC sample", vjust=0) +
  geom_segment(x=1, y=27, xend=1.6, yend=27, color="black", lwd=.5) +
  theme_half_open(12)
p.coef

# Extract legend information from plot
legend <- get_legend(p.coef + 
                       guides(color = guide_legend(nrow = 1)) +
                       theme(legend.position = "bottom", legend.justification="center"))

# Create plot without legend for joint plotting
p.coef_wo <- p.coef + theme(legend.position = "none", plot.title.position = "plot")

# Align both plots
top.row <- align_plots(p.stat, p.coef_wo, align='hv', axis='l')



# =============================================================
# Mahalanobis matching on total emissions (log) variable
# =============================================================

# Mahalanobis matching
df.match <- df[,c("logDV","logAlloc0","foreign", "logEmit0","ETSsector","iso2","BVD", "mnc", "sample")]
df.match <- data.frame(na.omit(df.match))

# Check data before matching
m.out0 <- matchit(foreign ~ logEmit0, data=df.match, method=NULL, distance="glm")

# Matching
m.out1 <- matchit(as.numeric(foreign) ~ logEmit0, data=df.match, method="nearest", distance="mahalanobis")
df.match.post <- match.data(m.out1)
dim(df.match.post)

# Model (1): matched sample
mmm1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match.post, weights=weights) # main model
est.mmm1 <- (exp(summary(mmm1)$coef[["foreign1","Estimate"]])-1)*100 # marginal effect
ci.mmm1 <- (exp(coefci(mmm1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# Model (2): matched sample
mmm2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match.post, weights=weights) # main model
est.mmm2 <- (exp(summary(mmm2)$coef[["foreign1","Estimate"]])-1)*100 # marginal effect
ci.mmm2 <- (exp(coefci(mmm2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# Create data frame with model coefficients for plotting
# Note: Make sure to run the code under "Main Results (Table 2)" above

coef <- c(est.mmm1, est.mmm2, est.m1, est.m2)
cilo <- c(ci.mmm1[1], ci.mmm2[1], ci.m1[1], ci.m2[1])
ciup <- c(ci.mmm1[2], ci.mmm2[2], ci.m1[2], ci.m2[2])
type <- c(rep("Matched", 2), rep("Unmatched", 2))
model <- c(0.95,1.55,1.05,1.65)

df.plot <- data.frame(model,type,coef,cilo,ciup)

# =============================================================
# Plot 1: Matching statistics
# =============================================================

# Standardized mean differences (smds) are negative, but univariate imbalance measure is positive
# To produce a plot that is on the positive scale, absolute values of smds are used

# Standardized mean difference measures
value <- abs(c(summary(m.out1)$sum.all[,3],summary(m.out1)$sum.matched[,3]))
name <- rep(c("SMD"),2)
sample <- c("unmatched","matched")
df.smd <- data.frame(name, sample, value)

# Univariable imbalance measure
value <- c(imbalance(group=df.match$foreign, data=df.match["logEmit0"])$tab$L1, imbalance(group=df.match.post$foreign, data=df.match.post["logEmit0"])$tab$L1)
name <- rep(c("L1"),2)
sample <- c("unmatched","matched")
df.imb <- data.frame(name, sample, value)

# Data that contains all the above information
df.stat <- rbind(df.smd,df.imb)

# Plot of matching statistics
p.stat <- ggplot(data=df.stat, aes(x=name, y=value, color=sample)) + 
  geom_point(shape=19, size=2) +
  coord_flip() +
  scale_colour_manual(values=c(cols[3],cols[1]), name="Sample", labels=c("Unmatched", "Matched")) +
  labs(x="", y="Matching statistic", title="Panel B: Mahalanobis Matching on Total Emissions") +
  scale_x_discrete(limits=rev(levels(df.stat$name))) +
  geom_hline(yintercept=0, lty=2) +
  scale_y_continuous(limits=c(0,0.05), breaks=c(0, 0.01, 0.02, 0.03, 0.04, 0.05)) +
  theme_half_open(12) +
  theme(legend.position="none", plot.title.position="plot")
p.stat

# =============================================================
# Plot 2: Coefficient plot
# =============================================================

p.coef <- ggplot(df.plot,aes(x=model, y=coef, color=factor(type))) +
  scale_x_continuous(limits=c(0.9,1.7), breaks=c(1,1.6), 
                     labels=c("Model (1)", "Model (2)")) +
  geom_hline(yintercept=0, lty=2) +
  scale_y_continuous(limits=c(0,30), breaks=c(0,5,10,15,20,25,30)) +
  geom_errorbar(aes(ymin=cilo, ymax=ciup, color=factor(type)), width=0, lwd=1) +
  geom_point(aes(y=coef, color=factor(type)), shape=19, size=2) +
  scale_colour_manual(values=c(cols[3],cols[1]), name="Sample", labels=c("Matched", "Unmatched")) +
  labs(x="Models", y="Marginal effect", title="") +
  theme(legend.position="bottom") +
  annotate(geom="text", x=1.3 , y=27.5, label="MNC sample", vjust=0) +
  geom_segment(x=1, y=27, xend=1.6, yend=27, color="black", lwd=.5) +
  theme_half_open(12)
p.coef

# Create plot without legend for joint plotting
p.coef_wo <- p.coef + theme(legend.position = "none", plot.title.position = "plot")

# Align both plots
bottom.row <- align_plots(p.stat, p.coef_wo, align='hv', axis='l')


# Align and order plots for final plotting
full <- plot_grid(top.row[[1]], top.row[[2]], bottom.row[[1]], bottom.row[[2]], nrow=2, align='v', axis='l')

# Add common legend
# Note: Make sure to run the line using 'get_legend' above 
full_wlegend <- plot_grid(full, legend, ncol = 1, rel_heights = c(1, 0.1))
full_wlegend

# Export to PDF
ggexport(full_wlegend, filename="fig2.pdf", width=9, height=8)



# =============================================================
#                       END OF CODE
# =============================================================

